% PatentFigures.m  
%
%   Read the patent data provided by Amit Seru (June 16, 2021)
%   and create basic figures. Takes about 40 seconds to load data.
%
%   Lookup patent codes: https://www.uspto.gov/web/patents/classification/cpc/html/cpc-D.html
%
%   First version: June 2021

clear all; close all;
diarychad('PatentFigures');

% Read the patent data
fname='patent_cpc_grant_date_raw.csv'; % Name of the file to read
tic
T=readtable(fname); %,'Sheet','Data');
s=summary(T)
chadtimer
definecolors

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% MAIN GROUPS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

main_names={
'Human (A)'       % {'A'}
'Operations (B)'  % {'B'}
'Chemistry (C)'   % {'C'}
'Textiles (D)'    % {'D'}
'Construction (E)' % {'E'}
'Machines (F)'    % {'F'}
'Physics (G)'     % {'G'}
'Electricity (H)' % {'H'}
'Other (Y)'       % {'Y'}
}

groups=unique(T.main_cpc);
years =unique(T.grant_year);
NN=length(groups);
TT=length(years);

[G,IDcpc,IDyear]=findgroups(T.main_cpc,T.grant_year);
x         = splitapply(@length,T.patent_num,G); % Count number of patents in each group   855x1 need to reshape
patents_main=reshape(x,TT,NN);

% Reproduce Amit's figure
figure(1); figsetup; makefigwide;
plot(years,patents_main/1000);
chadfig2(' ','Patent grants (thousands)',1,0)
print('-depsc','PatentFigures_amit');

% Log scale
figure(2); figsetup; makefigwide;
plot(years,log(patents_main/1000));
chadfig2(' ','Patent grants (thousands)',1,0)
nums=[1 4 16 64 256 1024]';
relabelaxis(log(nums),num2str(nums),'y');
print('-depsc','PatentFigures_log');

% Select years for smoothing
myyrs=(1950:10:2020)';
myyrs(end)=2019; % Avoid covid
yr0=min(years)-1;
t=myyrs-yr0;
figure(3); figsetup; makefigwide;
plot(years(t),log(patents_main(t,:)/1000));
chadfig2(' ','Patent grants (thousands)',1,0)
nums=[1 4 16 64 256 1024]';
relabelaxis(log(nums),num2str(nums),'y');
print('-depsc','PatentFigures_smoothed');

% Growth rates for select periods
gyrs=[1950 1990
      1950 2019];
growth=growthrate(patents_main,gyrs-yr0)';
disp ' '; disp ' ';
disp 'Average annual growth rate of patent grants'
cshow(main_names,growth*100,'%14.2f','1950-1990 1950-2019','latex')

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Results for Sub-classes, e.g. A43=Footwear, B61=Railways, F21=Lighting, H03=Basic electronic circuitry
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp ' ';
disp 'Processing the sub-classes; takes about 3 minutes...';
tic;
b=T.cpc;
%b=b(1:10000); % For practice
bb=cell(length(b),1);

for i=1:length(b);
  yy=char(b(i));
  bb(i)={yy(1:3)};
end;
chadtimer
T.class=bb;
class_names=unique(T.class);
NN=length(class_names);
fprintf('There are %4.0f sub-classes\n',NN)

[G,IDsub,IDyear]=findgroups(T.class,T.grant_year);
x         = splitapply(@length,T.patent_num,G); % Count number of patents in each group   855x1 need to reshape
patents_sub=zeros(TT,NN)*NaN;
% Reshape by hand to deal with missing values
for i=1:length(x);
  irow=find(years==IDyear(i));
  icol=find(ismember(class_names,IDsub(i)));
  patents_sub(irow,icol)=x(i);
  %keyboard
end;


% Show data: 1950 1990 2020 and growth rates for all 130 classes
Legend=readtable('cpc_names.xls','Sheet','Subsections');
Legend.Variables
class_titles=cell(NN,1);
for i=1:NN;
  icol=find(ismember(Legend.id,class_names(i)));
  blah=Legend.title{icol};
  class_titles(i)={[class_names{i} ' ' blah(1:min(length(blah),20))]};
end 

gr_sub=growthrate(patents_sub,gyrs-yr0)'*100;
myyrs=[1950 1990 2019]
blah=[patents_sub(myyrs-yr0,:)' gr_sub];
fmt='%12.0f %12.0f %12.0f %12.2f %12.2f';
disp 'Patents by class and average annual growth rates'
cshow(class_titles,blah,fmt,'1950 1990 2019 gr50-90 gr50-19');

% Bar chart, growth rates
grsort50_90=sort(gr_sub(:,1));
grsort50_19=sort(gr_sub(:,2));
figure(4); figsetup; makefigwide;
h1=bar(grsort50_19);
h2=bar(grsort50_90);
h1.FaceColor=myblue;
h2.FaceColor=mygreen;
%relabelaxis(1:NN,class_names,'x');
%xtickangle(-45)
set(gca,'XTick',[])
text(30,5,'1950 - 2019');
text(60,-1,'1950 - 1990');
chadfig2('Patent Classes','Growth Rate (percent)',1,0)
print('-depsc','PatentFigures_Classes');


% Edit labels for next graphs
class_plotnames=class_names;
indx=find(ismember(class_names,{'C14'})); class_plotnames{indx}='Leather/Pelts (C14)';
indx=find(ismember(class_names,{'H01'})); class_plotnames{indx}='Electric Components (H01)';
indx=find(ismember(class_names,{'B32'})); class_plotnames{indx}='Materials (B32)';
indx=find(ismember(class_names,{'G06'})); class_plotnames{indx}='Computing (G06)';
indx=find(ismember(class_names,{'D05'})); class_plotnames{indx}='Sewing (D05)';
indx=find(ismember(class_names,{'Y10'})); class_plotnames{indx}='Former USPC (Y10)';
indx=find(ismember(class_names,{'B43'})); class_plotnames{indx}='Writing implements (B43)';
indx=find(ismember(class_names,{'C30'})); class_plotnames{indx}='Crystals (C30)';
indx=find(ismember(class_names,{'C07'})); class_plotnames{indx}='O. Chemistry (C07)';
indx=find(ismember(class_names,{'G12'})); class_plotnames{indx}='Other instruments (G12)';
indx=find(ismember(class_names,{'B61'})); class_plotnames{indx}='Railways (B61)';
indx=find(ismember(class_names,{'D06'})); class_plotnames{indx}='Textile treatments (D06)';
indx=find(ismember(class_names,{'C13'})); class_plotnames{indx}='Sugar (C13)';
%indx=find(ismember(class_names,{''})); class_plotnames{indx}='';

% Growth rate versus level in a middle year?
gr50_90=gr_sub(:,1);
gr50_19=gr_sub(:,2);
nums=5.^(1:6);

%addpath('/home/chad/matlab/export_fig')
FS=16;
FSb=14;
figure(5); figsetup; makefigwide;
greygrid(log(nums),0);
h=plotnamesym2(log(patents_sub(1980-yr0,:))',gr50_19,class_plotnames,12,[],1,1.5);
labs=strmat('  5  #  25 # 125 # 625 # 3125#15625','#'); %num2str(nums');
relabelaxis(log(nums),labs,'x');
pnums=(-2:2:10);
plabs=strmat('-2% 0% 2% 4% 6% 8% 10%');
relabelaxis(pnums,plabs,'y');
set(gca,'FontSize',FS);
chadfig2('Patent Grants in 1980','Growth rate, 1950-2019',1,0,FSb);
wait('Edit figure and press any key to continue')
%print('-depsc','PatentFigures_Growth2019');
%export_fig PatentFigures_Growth2019'); -transparent
exportgraphics(gcf,'PatentFigures_Growth2019.eps','ContentType','vector')


% Growth rate versus level in a middle year? 1950-1990
figure(6); figsetup; makefigwide;
greygrid(log(nums),0);
plotnamesym2(log(patents_sub(1970-yr0,:))',gr50_90,class_plotnames,12,[],1,1.5);
relabelaxis(log(nums),labs,'x');
pnums=(-4:2:12);
plabs=strmat('-4% -2% 0% 2% 4% 6% 8% 10% 12%');
relabelaxis(pnums,plabs,'y');
set(gca,'FontSize',FS);
chadfig2('Patent Grants in 1970','Growth rate, 1950-1990',1,0,FSb);
wait('Edit figure and press any key to continue')
%print('-depsc','PatentFigures_Growth1990');
%export_fig PatentFigures_Growth1990.eps -transparent
exportgraphics(gcf,'PatentFigures_Growth1990.eps','ContentType','vector')

% Compute weighted average of the growth rates
weights1970=patents_sub(1970-yr0,:)'/nansum(patents_sub(1970-yr0,:));
AvgGrowth1950_1990=nansum(gr50_90.*weights1970);
fprintf('The weighted average of patent growth, 1950-1990, is %6.2f percent\n',AvgGrowth1950_1990)
weights1980=patents_sub(1980-yr0,:)'/nansum(patents_sub(1980-yr0,:));
AvgGrowth1950_2019=nansum(gr50_19.*weights1980);
fprintf('The weighted average of patent growth, 1950-2019, is %6.2f percent\n',AvgGrowth1950_2019)


diary off;

